# 爬csdn

import requests
from lxml import etree
import csv

url = 'https://blog.csdn.net/weixin_46035332?type=blog'

headers = {
    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36'}

resp = requests.get(url, headers=headers)
resp.encoding = 'utf-8'
e = etree.HTML(resp.text)

# 提取数据
divs = e.xpath('//div[@class="list-box-cont"]')

movie_list = []

for div in divs:
    dic = {}
    title = div.xpath('./div/div[@class="blog-list-box-top"]/h4/text()')
    dic['标题'] = title[0].strip() if title else ''

    content = div.xpath('./div/div[@class="blog-list-content"]/text()')
    dic['内容'] = content[0].strip() if title else ''

    data = div.xpath('./div[@class="blog-list-footer"]/div/div[@class="view-time-box"]/text()')
    data = data[0].strip().replace('发布博客', '') if data else ''
    dic['发布日期'] = data

    readNum = div.xpath(
        './div[@class="blog-list-footer"]/div/div[@class="view-num-box"]/span[@class="view-num"]/text()')
    dic['阅读数'] = readNum[0].strip() if title else ''

    likeNum = div.xpath(
        './div[@class="blog-list-footer"]/div/div[@class="give-like-box"]/span[@class="give-like-num"]/text()')
    dic['点赞数'] = likeNum[0].strip() if title else ''

    commentNum = div.xpath(
        './div[@class="blog-list-footer"]/div[@class="blog-list-footer-left"]/div[5]/span[@class="comment-num"]/text()')
    dic['评论数'] = commentNum[0].strip() if title else ''

    collectionNum = div.xpath(
        './div[@class="blog-list-footer"]/div[@class="blog-list-footer-left"]/div[6]/span[@class="comment-num"]/text()')
    dic['收藏数'] = collectionNum[0].strip() if title else ''

    movie_list.append(dic)

with open('csdn博客.xls', 'w', encoding='utf-8-sig', newline='') as f:
    writer = csv.DictWriter(f, fieldnames=('标题', '内容', '发布日期', '阅读数', '点赞数', '评论数', '收藏数'))
    # 2. 写入表头
    writer.writeheader()
    # 3. 写入数据
    writer.writerows(movie_list)
